clear all
set more off


/// prepare baseline teacher information
use "$path_data/original_teacher.dta", clear
drop if endline == 1
keep student_no age_tchr gender_tchr edu_tchr
sort student_no
save "$path_data/temp/teacher", replace

import excel "$path_data/followup_students_master.xlsx", clear first
gen student_no = q1b

/// remove imcomplete interview
drop if q0a == "319003"

/// Yes==1, No==0, Dont know ==.
recode q2a q2c q2h q3a q3e q4a q7a q7b q9a1 q9a3 q9b1 (2=0)
recode q9b1 (3=.)




// other changes

gen PSC_grade = q2k2
replace PSC_grade ="0" if q2k2== "Auto"
replace PSC_grade ="0" if q2k2== "mone nai"
replace PSC_grade ="0" if q2k2== ""
replace PSC_grade ="3.08" if q2k2=="3.o8"
destring PSC_grade, replace
recode PSC_grade(0=.)

gen JSC_grade = q2l2
gen JSC_auto = 0
replace JSC_auto = 1 if q2l2 == "Ato pas"
replace JSC_auto = 1 if q2l2 == "Atou pass"
replace JSC_auto = 1 if q2l2 == "Auto"
replace JSC_auto = 1 if q2l2 == "Auto  pass"
replace JSC_auto = 1 if q2l2 == "Auto Pass"
replace JSC_auto = 1 if q2l2 == "Auto pass"
replace JSC_auto = 1 if q2l2 == "Autopash."
replace JSC_auto = 1 if q2l2 == "Autopass"
replace JSC_auto = 1 if q2l2 == "auto pass"
replace JSC_auto = 1 if q2l2 == "result school thake deyni"
replace JSC_grade = "0" if JSC_auto == 1
replace JSC_grade = "0" if q2l2 == ""
destring JSC_grade, replace
recode JSC_grade(0=.)


local q5an 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
foreach i in `q5an'{
gen q5a_`i' = 1 if q5a==`i'
recode q5a_`i'(.=0)
}

local q5bn 1 2 3 4 5 6 7 8 9
foreach i in `q5bn'{
gen q5b_`i' = 1 if q5b==`i'
recode q5b_`i'(.=0)
}



gen q6a1_correct = 1 if q6a1==10800
gen q6a2_correct = 1 if q6a2==9
gen q6a3a_correct = 1 if q6a3a==70
gen q6a3b_correct = 1 if q6a3b==50
gen q6a4_correct = 1 if q6a4==20
gen q6a5_correct = 1 if q6a5==5

recode q6a1_correct q6a2_correct q6a3a_correct q6a3b_correct q6a4_correct q6a5_correct (.=0)

save "$path_data/temp/followup_student_data", replace
 

import excel "$path_data/followup_students_extra.xlsx",  clear first
drop if q1b==1223 & _index==64
drop if q1b==2804 & _index==116
keep q1b q3c1new q3c2new q3e _index
rename q3e q3enew
recode q3enew(2=0)
destring q1b, replace
merge 1:1 q1b using "$path_data/temp/followup_student_data"
save "$path_data/temp/followup_student_data", replace


// check the accuracy of q3c
drop if _merge==1
drop _merge
sum q3c1new
sum q3c2new
* tab treatment q3enew
tab q3e q3enew

// merge with baseline & endline
use "$path_data/original_raw_score", clear
keep student_no DT_score_pre cpcs_pre rosen_pre
save "$path_data/temp/rawscore", replace

use "$path_data/temp/followup_student_data", clear
drop _merge 
merge 1:1 student_no using "$path_data/original_main"
rename _merge _merge_base_character
merge 1:1 student_no using "$path_data/temp/rawscore"
rename _merge _merge_base_score
tab treatment _merge_base_score

gen attrition = 0 if _merge_base_character == 3
recode attrition (.=1)
tab attrition

// fix missing values using baseline information
replace school_no = 18 if school_no == . & student_no == 1817
replace treatment = 1 if student_no == 1817
replace grade = 2 if student_no == 1817
replace branch1 = 0 if student_no == 1817
replace branch2 = 0 if student_no == 1817
replace branch3 = 1 if student_no == 1817
replace branch4 = 0 if student_no == 1817

save "$path_data/temp/student_unbalance", replace


// keep balanced panel
keep if attrition == 0

save "$path_data/temp/endline_followup_student_data", replace


gen followup_cog = q6a1_correct + q6a2_correct + q6a3a_correct + q6a3b_correct + q6a4_correct + q6a5_correct

/// non-cog
// positive: 2,3,5,7,10,11,12,17,18,20,21,22,23,25,26,27,28,29,32,33,34,36,37,39
// positive-cog:1,13,14,19,24,
// negative: 4,6,8,9,30,31,35,38,40

local q99 q6c1 q6c2 q6c3 q6c4 q6c5 q6c6 q6c7 q6c8 q6c9 q6c10 q6c11 q6c12 q6c13 q6c14 q6c15 q6c16 q6c17 q6c18 q6c19 q6c20 ///
q6c21 q6c22 q6c23 q6c24 q6c25 q6c26 q6c27 q6c28 q6c29 q6c30 q6c31 q6c32 q6c33 q6c34 q6c35 q6c36 q6c37 q6c38 q6c39 q6c40 ///
q8a1a q8a2a q8a3a q8a4a q8a5a

foreach y in `q99'{
replace `y'=.  if `y'==99
}

gen noncog4 = 5 - q6c4
gen noncog6 = 5 - q6c6
gen noncog8 = 5 - q6c8
gen noncog9 = 5 - q6c9
gen noncog30 = 5 - q6c30
gen noncog31 = 5 - q6c31
gen noncog35 = 5 - q6c35
gen noncog38 = 5 - q6c38
gen noncog40 = 5 - q6c40

gen followup_noncog = q6c1+q6c2+q6c3+noncog4+q6c5+noncog6+q6c7+noncog8+noncog9+q6c10+q6c11+q6c12+q6c13+q6c14+q6c17+q6c18+q6c19+q6c20+q6c21+q6c22+q6c23+q6c24+q6c25+q6c26+q6c27+q6c28+q6c29+noncog30+noncog31+q6c32+q6c33+q6c34+noncog35+q6c36+q6c37+noncog38+noncog40+q6c39
gen followup_noncog2 = q6c2+q6c3+noncog4+q6c5+noncog6+q6c7+noncog8+noncog9+q6c10+q6c11+q6c12+q6c17+q6c18+q6c20+q6c21+q6c22+q6c23+q6c25+q6c26+q6c27+q6c28+q6c29+noncog30+noncog31+q6c32+q6c33+q6c34+noncog35+q6c36+q6c37+noncog38+noncog40+q6c39

sum followup_noncog followup_noncog2

replace followup_noncog = 190 - followup_noncog
replace followup_noncog2 = 180 - followup_noncog2

gen RSES = 40 - q6c2 - q6c3 - noncog4 - noncog6 - noncog8 - noncog9 - q6c10 - q6c11
gen CPCS = 50 - q6c2 - q6c3 - noncog4 - q6c5 - noncog6 -q6c7 - noncog8 - noncog9 - q6c10 - q6c11

/// variables for study situation
gen tutor = 1 if q3a == 1
gen study_other = 1 if q4a == 1
gen study_affect_covid = 1 if q9a21 == 1
gen hometutoring = 1 if q9a2a1 == 1
gen onlineclass = 1 if q9a2a2 == 1
gen studymyself = 1 if q9a2a3 == 1
gen parentsteach = 1 if q9a2a4 == 1
recode tutor study_other study_affect_covid hometutoring onlineclass studymyself parentsteach (.=0)

/// other variable
gen phone_survey = 1 if q1a0 == 2
recode phone_survey (.=0)

/// Standardization
egen DT_score_pre_mean = mean(DT_score_pre)
egen DT_score_pre_sd = sd(DT_score_pre)
gen DT_score_pre_std = (DT_score_pre-DT_score_pre_mean)/DT_score_pre_sd
drop DT_score_pre_mean DT_score_pre_sd 

egen cpcs_pre_mean = mean(cpcs_pre)
egen cpcs_pre_sd = sd(cpcs_pre)
gen cpcs_pre_std = (cpcs_pre-cpcs_pre_mean)/cpcs_pre_sd
drop cpcs_pre_mean cpcs_pre_sd 

egen rosen_pre_mean = mean(rosen_pre)
egen rosen_pre_sd = sd(rosen_pre)
gen rosen_pre_std = (rosen_pre-rosen_pre_mean)/rosen_pre_sd
drop rosen_pre_mean rosen_pre_sd 

egen followup_cog_mean = mean(followup_cog)
egen followup_cog_sd = sd(followup_cog)
gen followup_cog_std = (followup_cog-followup_cog_mean)/followup_cog_sd
drop followup_cog_mean followup_cog_sd 

egen followup_noncog_mean = mean(followup_noncog)
egen followup_noncog_sd = sd(followup_noncog)
gen followup_noncog_std = (followup_noncog - followup_noncog_mean)/followup_noncog_sd
drop followup_noncog_mean followup_noncog_sd 

egen CPCS_mean = mean(CPCS)
egen CPCS_sd = sd(CPCS)
gen CPCS_std = (CPCS - CPCS_mean)/CPCS_sd
drop CPCS_mean CPCS_sd 

egen RSES_mean = mean(RSES)
egen RSES_sd = sd(RSES)
gen RSES_std = (RSES-RSES_mean)/RSES_sd
drop RSES_mean RSES_sd 

/// missing
gen DT_score_pre_std_missing_dummy = 1 if DT_score_pre_std == .
gen cpcs_pre_std_missing_dummy = 1 if cpcs_pre_std == .
gen rosen_pre_std_missing_dummy = 1 if rosen_pre_std == .
recode DT_score_pre_std_missing_dummy cpcs_pre_std_missing_dummy rosen_pre_std_missing_dummy (.=0)

gen DT_score_pre_std_missing_0 = DT_score_pre_std if DT_score_pre_std_missing == 0
gen cpcs_pre_std_missing_0 = cpcs_pre_std if cpcs_pre_std != .
gen rosen_pre_std_missing_0 = rosen_pre_std if rosen_pre_std != .
recode DT_score_pre_std_missing_0 cpcs_pre_std_missing_0 rosen_pre_std_missing_0 (.=0)

gen hyper = 1 if q7d2a == 1 & q7d2b == 2
replace hyper = 1 if q7d2a == 1 & q7d2b == 3
replace hyper = 1 if q7d2a == 2 & q7d2b == 3
gen hypernoinfo = 1 if q7d2a == .
recode hyper hypernoinfo (.=0)
replace hyper = . if hypernoinfo == 1



/// merge teacher information
merge 1:1 student_no using "$path_data/temp/teacher"
rename _merge _merge_teacher
recode age_tchr(.=0)
gen age_tchr_missing_dummy = 1 if age_tchr == 0
recode age_tchr_missing_dummy(.=0)


save "$path_data/temp/followup_student_baseline_score_missing_dummy", replace



